{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "7c7b5f67",
   "metadata": {},
   "outputs": [],
   "source": [
    "from rdkit import Chem\n",
    "from rdkit.Chem import AllChem\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import re\n",
    "def mult(a,b):\n",
    "    return a/b"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "3e7b4dbe",
   "metadata": {},
   "outputs": [],
   "source": [
    "mol_name = 'output_5.mol' \n",
    "mol = Chem.MolFromMolFile(mol_name)\n",
    "cif_name = f'data_cif{mol_name}'\n",
    "#名字是象征意义，封装时传递mol\n",
    "# 以mol传递mol文件\n",
    "m2 = Chem.AddHs(mol) # 加氢原子\n",
    "AllChem.EmbedMolecule(m2) # 2D->3D化\n",
    "AllChem.MMFFOptimizeMolecule(m2)   # 使用MMFF94最小化RDKit生成的构象\n",
    "#m3 = Chem.RemoveHs(m2) # 删除氢原子\n",
    "#print(Chem.MolToMolBlock(m3))\n",
    "m2.SetProp('_Name', 'cyclobutane')\n",
    "Chem.MolToMolFile(m2, 'output_5.txt')\n",
    "CAS_file=open('output_5.txt','r',encoding='utf-8')\n",
    "cas_line=CAS_file.readlines()\n",
    "top_1 = pd.DataFrame()\n",
    "top_2 = pd.DataFrame()\n",
    "test_0 = '0  0  0  0  0  0  0  0  0  0  0  0'\n",
    "my_re_eng = re.compile(r'[A-Za-z]',re.S)\n",
    "my_re_num = re.compile(r'[0-9]',re.S)\n",
    "a = 0\n",
    "for i in range(len(cas_line)):\n",
    "    res = re.findall(my_re_eng,cas_line[i])\n",
    "    if test_0 in cas_line[i]:\n",
    "        cif = cas_line[i].split()\n",
    "        top_1.insert(i-4,column=f\"atom{i-3}\",value=cif)\n",
    "    elif len(res) or cas_line[i]=='\\n':\n",
    "        pass\n",
    "    else:\n",
    "        a = a+1\n",
    "        cif = cas_line[i].split()\n",
    "        top_2.insert(0,column=f\"bond{a}\",value=cif)\n",
    "#提取出来了必要的信息"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "6166ed2d",
   "metadata": {},
   "outputs": [],
   "source": [
    "#loc_mult 计算后的原子位置\n",
    "rate = 40\n",
    "loc = top_1[0:3]\n",
    "loc = loc.astype(\"float\")\n",
    "loc_mult = loc.pipe(mult,rate) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "4c7711df",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "F1     F     0.04574     -0.0503     0.00313    0.00000  Uiso   1.00\n",
      "C2     C     0.023     -0.02573     0.00158    0.00000  Uiso   1.00\n",
      "C3     C     -0.01078     -0.03353     -0.00062    0.00000  Uiso   1.00\n",
      "C4     C     -0.03372     -0.0075     -0.00219    0.00000  Uiso   1.00\n",
      "O5     O     -0.06693     -0.01512     -0.00434    0.00000  Uiso   1.00\n",
      "C6     C     -0.023     0.02573     -0.00158    0.00000  Uiso   1.00\n",
      "F7     F     -0.04574     0.0503     -0.00313    0.00000  Uiso   1.00\n",
      "C8     C     0.01078     0.03353     0.00062    0.00000  Uiso   1.00\n",
      "C9     C     0.03372     0.0075     0.00219    0.00000  Uiso   1.00\n",
      "O10     O     0.06693     0.01512     0.00434    0.00000  Uiso   1.00\n",
      "H11     H     -0.01892     -0.05938     -0.00108    0.00000  Uiso   1.00\n",
      "H12     H     -0.07912     0.00601     -0.0052    0.00000  Uiso   1.00\n",
      "H13     H     0.01892     0.05938     0.00108    0.00000  Uiso   1.00\n",
      "H14     H     0.07912     -0.00601     0.0052    0.00000  Uiso   1.00\n"
     ]
    }
   ],
   "source": [
    "for i in range(1,loc_mult.shape[1]+1): \n",
    "    atom_num = '{0}{1}'.format(top_1['atom{}'.format(i)][3],i) #原子名，号\n",
    "    atom_type = top_1['atom{}'.format(i)][3] #原子名\n",
    "    atom_loc_0 = loc_mult['atom{}'.format(i)][0]\n",
    "    atom_loc_0 = round(atom_loc_0, 5)     #原子位置x\n",
    "    atom_loc_1 = loc_mult['atom{}'.format(i)][1]\n",
    "    atom_loc_1 = round(atom_loc_1, 5)     #原子位置y\n",
    "    atom_loc_2 = loc_mult['atom{}'.format(i)][2]\n",
    "    atom_loc_2 = round(atom_loc_2, 5)     #原子位置z\n",
    "    print(atom_num,'   ',atom_type,'   ',atom_loc_0,'   ',atom_loc_1,'   ',atom_loc_2,'   0.00000  Uiso   1.00')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "30701482",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "F1       C2        1.3408611188337143     .       S\n",
      "C2       C3        1.389734553790759     .       D\n",
      "C3       C4        1.3890574394171034     .       S\n",
      "C4       O5        1.3658819129046256     .       S\n",
      "C4       C6        1.3967411571225357     .       D\n",
      "C6       F7        1.3408611188337143     .       S\n",
      "C6       C8        1.389734553790759     .       S\n",
      "C8       C9        1.389061964060639     .       D\n",
      "C9       O10        1.3658756056098225     .       S\n",
      "C9       C2        1.3967429076247353     .       S\n",
      "C3       H11        1.0840256223909104     .       S\n",
      "O5       H12        0.9763505978899178     .       S\n",
      "C8       H13        1.0840256223909104     .       S\n",
      "O10       H14        0.9763505978899178     .       S\n"
     ]
    }
   ],
   "source": [
    "for j in range(1,top_2.shape[1]+1): #判断成键分子 、类型、及键长 \n",
    "    x = 0\n",
    "    ins_1 = '{0}{1}'.format(top_1['atom{}'.format(top_2['bond{}'.format(j)][0])][3],top_2['bond{}'.format(j)][0])#成键分子1\n",
    "    ins_2 = '{0}{1}'.format(top_1['atom{}'.format(top_2['bond{}'.format(j)][1])][3],top_2['bond{}'.format(j)][1]) #成键分子2 \n",
    "    if top_2['bond{}'.format(j)][2] == '1':#判断成键类型\n",
    "        band_type = 'S'\n",
    "    elif top_2['bond{}'.format(j)][2] == '2':\n",
    "        band_type = 'D'\n",
    "    elif top_2['bond{}'.format(j)][2] == '3':\n",
    "        band_type = 'T'\n",
    "    else:\n",
    "        pass\n",
    "    for i in range(3):  #判断成键类型\n",
    "        a = loc['atom{}'.format(top_2['bond{}'.format(j)][0])][i]\n",
    "        b = loc['atom{}'.format(top_2['bond{}'.format(j)][1])][i]\n",
    "        c = (a-b)**2\n",
    "        x = x + c\n",
    "    x = x**0.5\n",
    "    print(ins_1,'     ',ins_2,'      ',x,'   ','.','     ',band_type)\n",
    "    \n",
    "#整体第二块"
   ]
  }
 ],
 "metadata": {
  "interpreter": {
   "hash": "7ed5c1bb94f52fd010b3d3ccf5daae836e7d243cc9c2685aabc4fdf5d9323919"
  },
  "kernelspec": {
   "display_name": "Python 3.7.11 ('csd')",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
